{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "51d52d39",
   "metadata": {},
   "source": [
    "# Noun chunks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3568b1ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "%run -i \"../util/file_utils.ipynb\"\n",
    "%run -i \"../util/lang_utils.ipynb\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "fc41d806",
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_noun_chunks(text, model):\n",
    "    doc = model(text)\n",
    "    for noun_chunk in doc.noun_chunks:\n",
    "        print(noun_chunk.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "40d23c2e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sherlock Holmes\n",
      "she\n",
      "the_ woman\n",
      "I\n",
      "him\n",
      "her\n",
      "any other name\n",
      "his eyes\n",
      "she\n",
      "the whole\n",
      "her sex\n",
      "It\n",
      "he\n",
      "any emotion\n",
      "Irene Adler\n",
      "All emotions\n",
      "his cold, precise but admirably balanced mind\n",
      "He\n",
      "I\n",
      "it\n",
      "the world\n",
      "a lover\n",
      "he\n",
      "himself\n",
      "a\n",
      "false position\n",
      "He\n",
      "the softer passions\n",
      "a gibe\n",
      "a sneer\n",
      "They\n",
      "admirable things\n",
      "the observer\n",
      "the veil\n",
      "men’s motives\n",
      "actions\n",
      "the trained\n",
      "reasoner\n",
      "such intrusions\n",
      "his own delicate and finely\n",
      "adjusted temperament\n",
      "a distracting factor\n",
      "which\n",
      "a doubt\n",
      "all his mental results\n",
      "Grit\n",
      "a sensitive\n",
      "instrument\n",
      "a crack\n",
      "his own high-power lenses\n",
      "a strong emotion\n",
      "a nature\n",
      "his\n",
      "one woman\n",
      "him\n",
      "that woman\n",
      "the late Irene\n",
      "Adler\n",
      "dubious and questionable memory\n"
     ]
    }
   ],
   "source": [
    "sherlock_holmes_part_of_text = read_text_file(\"../data/sherlock_holmes_1.txt\")\n",
    "print_noun_chunks(sherlock_holmes_part_of_text, small_model)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "986d4ba8",
   "metadata": {},
   "source": [
    "# Explore noun chunk properties"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "543a4eec",
   "metadata": {},
   "outputs": [],
   "source": [
    "def explore_properties(sentence, model):\n",
    "    doc = model(sentence)\n",
    "    other_span = \"emotions\"\n",
    "    other_doc = model(other_span)\n",
    "    for noun_chunk in doc.noun_chunks:\n",
    "        print(noun_chunk.text)\n",
    "        print(\"Noun chunk start and end\", \"\\t\", noun_chunk.start, \"\\t\", noun_chunk.end)\n",
    "        print(\"Noun chunk sentence:\", noun_chunk.sent)\n",
    "        print(\"Noun chunk root:\", noun_chunk.root.text)\n",
    "        print(f\"Noun chunk similarity to '{other_span}'\", noun_chunk.similarity(other_doc))\n",
    "    print(f\"Similarity of the sentence '{sentence}' to '{other_span}':\", doc.similarity(other_doc))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "824f8805",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "All emotions\n",
      "Noun chunk start and end \t 0 \t 2\n",
      "Noun chunk sentence: All emotions, and that one particularly, were abhorrent to his cold, precise but admirably balanced mind.\n",
      "Noun chunk root: emotions\n",
      "Noun chunk similarity to 'emotions' 0.4026421588260174\n",
      "his cold, precise but admirably balanced mind\n",
      "Noun chunk start and end \t 11 \t 19\n",
      "Noun chunk sentence: All emotions, and that one particularly, were abhorrent to his cold, precise but admirably balanced mind.\n",
      "Noun chunk root: mind\n",
      "Noun chunk similarity to 'emotions' -0.036891259527462\n",
      "Similarity of the sentence 'All emotions, and that one particularly, were abhorrent to his cold, precise but admirably balanced mind.' to 'emotions': 0.03174900767577446\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_1807/2430050149.py:10: UserWarning: [W007] The model you're using has no word vectors loaded, so the result of the Span.similarity method will be based on the tagger, parser and NER, which may not give useful similarity judgements. This may happen if you're using one of the small models, e.g. `en_core_web_sm`, which don't ship with word vectors and only use context-sensitive tensors. You can always add your own word vectors, or use one of the larger models instead if available.\n",
      "  print(f\"Noun chunk similarity to '{other_span}'\", noun_chunk.similarity(other_doc))\n",
      "/tmp/ipykernel_1807/2430050149.py:11: UserWarning: [W007] The model you're using has no word vectors loaded, so the result of the Doc.similarity method will be based on the tagger, parser and NER, which may not give useful similarity judgements. This may happen if you're using one of the small models, e.g. `en_core_web_sm`, which don't ship with word vectors and only use context-sensitive tensors. You can always add your own word vectors, or use one of the larger models instead if available.\n",
      "  print(f\"Similarity of the sentence '{sentence}' to '{other_span}':\", doc.similarity(other_doc))\n"
     ]
    }
   ],
   "source": [
    "sentence = \"All emotions, and that one particularly, were abhorrent to his cold, precise but admirably balanced mind.\"\n",
    "explore_properties(sentence, small_model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "57645e29",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "All emotions\n",
      "Noun chunk start and end \t 0 \t 2\n",
      "Noun chunk sentence: All emotions, and that one particularly, were abhorrent to his cold, precise but admirably balanced mind.\n",
      "Noun chunk root: emotions\n",
      "Noun chunk similarity to 'emotions' 0.6302678068015664\n",
      "his cold, precise but admirably balanced mind\n",
      "Noun chunk start and end \t 11 \t 19\n",
      "Noun chunk sentence: All emotions, and that one particularly, were abhorrent to his cold, precise but admirably balanced mind.\n",
      "Noun chunk root: mind\n",
      "Noun chunk similarity to 'emotions' 0.5744456705692561\n",
      "Similarity of the sentence 'All emotions, and that one particularly, were abhorrent to his cold, precise but admirably balanced mind.' to 'emotions': 0.640366414527618\n"
     ]
    }
   ],
   "source": [
    "sentence = \"All emotions, and that one particularly, were abhorrent to his cold, precise but admirably balanced mind.\"\n",
    "explore_properties(sentence, large_model)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
